library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.0 ✔ purrr 1.0.0
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.1 ✔ stringr 1.5.0
## ✔ readr 2.1.3 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(lubridate)
## Loading required package: timechange
##
## Attaching package: 'lubridate'
##
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(leaflet)
You can also embed plots, for example:
df %>%
group_by(speed_limit)%>%
summarise(proportion_severe = mean(accident_severity))
## # A tibble: 6 × 2
## speed_limit proportion_severe
## <int> <dbl>
## 1 20 0.157
## 2 30 0.197
## 3 40 0.236
## 4 50 0.248
## 5 60 0.327
## 6 70 0.246
date_df <-
df%>%
group_by(date)%>%
summarise(proportion_severe = mean(accident_severity))
ggplot(date_df, aes(x=date, y=proportion_severe ))+
geom_point()+
geom_smooth()+
annotate('rect',xmin=as.Date("2020-03-23"),xmax=as.Date("2020-06-15"),ymin=0.1,ymax=Inf, fill='red', alpha=0.25) #Start of lockdown
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
# geom_vline(xintercept=as.Date("2020-06-15")) #Non Essential shops reopen
#No immediately clear winter effect by quarter/month
time_df<-
df%>%
mutate(hour = hour(time))%>%
mutate(month = month(date))%>%
group_by(hour, month)%>%
summarise(accident_proportion = mean(accident_severity))
## `summarise()` has grouped output by 'hour'. You can override using the
## `.groups` argument.
ggplot(time_df, aes(x=hour, y=accident_proportion))+
geom_point()+
geom_smooth()
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
#No immediately clear winter effect by quarter/month
time_df<-
df%>%
mutate(hour = hour(time))%>%
mutate(day =week(date))%>%
group_by(hour, day)%>%
summarise(accident_proportion = mean(accident_severity))
## `summarise()` has grouped output by 'hour'. You can override using the
## `.groups` argument.
ggplot(time_df, aes(x=hour, y=accident_proportion, group=day))+
geom_line()
lsoa_df <-
read.csv('./data/LSOA_(2011)_to_LSOA_(2021)_to_Local_Authority_District_(2022)_Lookup_for_England_and_Wales.csv')
lad_prop_df<-
df %>%
merge(.,lsoa_df, by.x="lsoa_of_accident_location", by.y="LSOA21CD")%>%
group_by(LAD22CD)%>%
summarise(accident_proportion = mean(accident_severity))
uk_regions <- geojsonio::geojson_read("https://martinjc.github.io/UK-GeoJSON/json/eng/topo_lad.json", what = "sp")
## Registered S3 method overwritten by 'geojsonsf':
## method from
## print.geojson geojson
uk_regions <- uk_regions[uk_regions$id %in% lad_prop_df$LAD22CD, ]
bins <-c(0,0.1,0.2,0.3,0.4,0.5,0.6)
pal <- colorBin("Reds", domain = lad_prop_df$accident_proportion, bins=bins )
leaflet(uk_regions)%>%
addTiles('https://server.arcgisonline.com/ArcGIS/rest/services/Canvas/World_Light_Gray_Base/MapServer/tile/{z}/{y}/{x}') %>%
addPolygons(stroke = FALSE, smoothFactor = 0.3,opacity = 1,fillOpacity = 0.7,dashArray = "3",
fillColor = ~pal(lad_prop_df$accident_proportion),)%>%
addLegend(pal = pal, values = lad_prop_df$accident_proportion, opacity = 0.7, title = NULL,
position = "bottomright")
mdl <- glm(accident_severity ~ speed_limit +night+weekend+season,data = df, family = binomial())
summary(mdl)
##
## Call:
## glm(formula = accident_severity ~ speed_limit + night + weekend +
## season, family = binomial(), data = df)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.0845 -0.6904 -0.6503 -0.6025 1.9368
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.0452614 0.0264636 -77.286 < 2e-16 ***
## speed_limit 0.0167986 0.0005489 30.605 < 2e-16 ***
## night 0.3565591 0.0290797 12.261 < 2e-16 ***
## weekend 0.0880066 0.0288110 3.055 0.00225 **
## seasonspring 0.2024068 0.0246309 8.218 < 2e-16 ***
## seasonsummer 0.1406371 0.0220018 6.392 1.64e-10 ***
## seasonautumn 0.0949846 0.0217818 4.361 1.30e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 95283 on 91186 degrees of freedom
## Residual deviance: 94127 on 91180 degrees of freedom
## AIC: 94141
##
## Number of Fisher Scoring iterations: 4
speeds <- seq(0,100,1)
n_samples <- length(speeds)
newdata = list(
speed_limit=speeds,
night=rep(0,n_samples),
weekend=rep(0,n_samples),
season = rep('winter',n_samples))
predicted_vals <- predict(mdl,type = "response", newdata= newdata, se.fit=TRUE)
results <-
tibble(
speeds =speeds,
mu = predicted_vals$fit,
upper_ci = predicted_vals$fit +1.96*predicted_vals$se.fit,
lower_ci = predicted_vals$fit -1.96*predicted_vals$se.fit
)
newdata_summer = list(
speed_limit=speeds,
night=rep(0,n_samples),
weekend=rep(0,n_samples),
season = rep('summer',n_samples))
predicted_vals <- predict(mdl,type = "response", newdata= newdata_summer, se.fit=TRUE)
results_summer <-
tibble(
speeds =speeds,
mu = predicted_vals$fit,
upper_ci = predicted_vals$fit +1.96*predicted_vals$se.fit,
lower_ci = predicted_vals$fit -1.96*predicted_vals$se.fit
)
ggplot(data=results, aes(x=speeds, y=mu))+
geom_line()+
geom_line(data=results_summer)+
geom_ribbon(data=results_summer,aes(ymin=lower_ci, ymax=upper_ci), fill='red', alpha=0.5)+
geom_ribbon(aes(ymin=lower_ci, ymax=upper_ci), fill='red', alpha=0.5)
#plot( seq(-0,70,1),predict(mdl,type = "response", newdata = list(speed_limit=seq(-0,70,1), night=rep(0,length(seq(0,70,1))))))
vehicle_df <-
read_csv('./data/dft-road-casualty-statistics-vehicle-2020.csv.xls')
## Rows: 167375 Columns: 27
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): accident_index, accident_reference, generic_make_model
## dbl (24): accident_year, vehicle_reference, vehicle_type, towing_and_articul...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
vehicle_df <-
merge(vehicle_df,df, by="accident_index")
sun_in_eyes <-
vehicle_df%>%
filter(vehicle_direction_from == 7|vehicle_direction_from == 3)%>%
mutate(month=month(date))%>%
mutate(hour=hour(time))%>%
filter(hour>6)%>%
group_by(month, hour)%>%
summarise(accident_proportion = mean(accident_severity))
## `summarise()` has grouped output by 'month'. You can override using the
## `.groups` argument.
ggplot(sun_in_eyes, aes(hour, accident_proportion, color=month))+
geom_point()
df <-
read_csv('./data/dft-road-casualty-statistics-casualty-2020.csv.xls')
## Rows: 115584 Columns: 18
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): accident_index, accident_reference
## dbl (16): accident_year, vehicle_reference, casualty_reference, casualty_cla...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.